"""
Nae-Chyun Chen
Johns Hopkins University
2021-2023
"""

import argparse
import logging
import pathlib
import subprocess
from typing import Union

logging.basicConfig(
    format="[%(asctime)s %(pathname)s:%(lineno)d %(levelname)s] %(message)s",
    level=logging.INFO,
    datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)

SKIP_MSG = "skip"


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-t",
        "--num_threads",
        type=int,
        default=4,
        help="Number of threads to use",
    )
    parser.add_argument(
        "-s",
        "--sequence_type",
        type=str,
        required=True,
        choices=["ilmn_pe", "ilmn_se", "pb_hifi", "ont"],
        help="Type of sequence data",
    )
    parser.add_argument(
        "-a",
        "--aligner",
        type=str,
        required=True,
        choices=[
            "bowtie2",
            "bwamem",
            "bwamem2",
            "minimap2",
            "winnowmap2",
            "strobealign",
        ],
    )
    parser.add_argument(
        "--leviosam2_exe",
        default="leviosam2",
        type=str,
        help="Path to the leviosam2 executable",
    )
    parser.add_argument(
        "--samtools_exe",
        default="samtools",
        type=str,
        help="Path to the samtools executable",
    )
    parser.add_argument(
        "--bgzip_exe",
        default="bgzip",
        type=str,
        help="Path to the bgzip executable",
    )
    parser.add_argument(
        "--gnu_time_exe",
        default="gtime",
        type=str,
        help=(
            "Path to the GNU Time executable "
            "(see https://www.gnu.org/software/time/)"
        ),
    )
    parser.add_argument(
        "--measure_time",
        action="store_true",
        help="Activate to measure time with `--gnu_time_exe`",
    )
    parser.add_argument(
        "--keep_tmp_files",
        action="store_true",
        help="Activate to keep temp files generated by the workflow",
    )
    parser.add_argument(
        "--aligner_exe",
        type=str,
        default="auto",
        help=(
            "Path to the aligner executable. If empty, inferred using `--aligner`"
        ),
    )
    parser.add_argument(
        "--source_label",
        type=str,
        default="source",
        help="Label of the source reference",
    )
    parser.add_argument(
        "--target_label",
        type=str,
        default="target",
        help="Label of the target reference",
    )
    parser.add_argument(
        "--read_group", type=str, default="", help="Read group string"
    )
    parser.add_argument(
        "-g",
        "--lift_max_gap",
        type=int,
        help="[lift] Max chain gap size allowed",
        default=None,
    )
    parser.add_argument(
        "--lift_commit_min_mapq",
        type=int,
        help="[lift] Min MAPQ to commit",
        default=None,
    )
    parser.add_argument(
        "--lift_commit_min_score",
        type=int,
        help="[lift] Min alignment score (AS:i tag) to commit",
        default=None,
    )
    parser.add_argument(
        "--lift_commit_max_frac_clipped",
        type=float,
        help=(
            "[lift] Max fraction of clipped bases "
            "(1- bam_cigar2rlen / seq_length) to commit; "
            "when higher, a read is deferred."
        ),
        default=None,
    )
    parser.add_argument(
        "--lift_commit_max_isize",
        type=int,
        help="[lift] Max template length (isize) to commit",
        default=None,
    )
    parser.add_argument(
        "--lift_commit_max_hdist",
        type=int,
        help="[lift] Max edit distance (NM:i tag) to commit",
        default=None,
    )
    parser.add_argument(
        "--lift_bed_commit_source",
        type=str,
        help=(
            "[lift] Path to a BED (source coordinates) "
            "where reads in the regions are always "
            "committed (often for suppress annotations)"
        ),
        default=None,
    )
    parser.add_argument(
        "--lift_bed_defer_target",
        type=str,
        help=(
            "[lift] Path to a BED (target cooridnates)"
            "where reads in the regions are always "
            "deferred"
        ),
        default=None,
    )
    parser.add_argument(
        "--lift_realign_config",
        type=str,
        help=("[lift] Path to the config file for realignment"),
        default=None,
    )
    parser.add_argument(
        "-i",
        "--input_bam",
        type=str,
        required=True,
        help="Path to the input SAM/BAM/CRAM file",
    )
    parser.add_argument(
        "-o", "--out_prefix", type=str, required=True, help="Output prefix"
    )
    parser.add_argument(
        "-O",
        "--out_format",
        type=str,
        default="bam",
        choices=["bam", "sam", "cram"],
        help="Output file format.",
    )
    parser.add_argument(
        "-C",
        "--leviosam2_index",
        type=str,
        required=True,
        help="Path to the leviosam2 index (a .clft file)",
    )
    parser.add_argument(
        "-f",
        "--target_fasta",
        type=str,
        required=True,
        help="Path to the target reference (FASTA file)",
    )
    parser.add_argument(
        "-fi",
        "--target_aligner_index",
        type=str,
        default="",
        help="Path to the target reference index for `--aligner`",
    )
    # parser.add_argument(
    #     "-s",
    #     "--source_fasta",
    #     type=str,
    #     help="Path to the source reference (FASTA file)",
    # )
    # parser.add_argument(
    #     "-si",
    #     "--source_fasta_index",
    #     type=str,
    #     help=("Path to the source reference index " "for `--aligner`"),
    # )
    parser.add_argument(
        "--use_preset",
        action="store_true",
        help="Use default workflow parameters",
    )
    parser.add_argument(
        "--dryrun", action="store_true", help="Activate the dryrun mode"
    )
    parser.add_argument(
        "--forcerun",
        action="store_true",
        help="Activate the forcerun mode. Rerun everything",
    )

    args = parser.parse_args()
    return args


def check_file_exists(fn: pathlib.Path) -> None:
    """Checks if a path is a file. Raises an error if not."""
    if not fn.is_file():
        raise FileNotFoundError(f"{fn} is not a file")


def validate_executable(cmd: str, lenient: bool = False) -> None:
    """Validates if an executable is valid.

    Args:
        cmd: command to run
        lenient: lenient mode.
            If False, return True a returncode of 0. If True, return True
            when either stdout/stderr has >1 lines of output.
            When a command does not exist, we get a "command not found"
            stderr result, so we require >1 lines of output.

    Returns:
        bool: True if a binary is valid
    """
    subprocess_out = subprocess.run([cmd], shell=True, capture_output=True)
    if not lenient and subprocess_out.returncode != 0:
        raise ValueError(
            f"`{cmd}` has a non-zero return code `{subprocess_out.returncode}`"
        )
    else:

        def _count_lines(bin_text):
            return bin_text.decode("utf-8").count("\n")

        if not (
            _count_lines(subprocess_out.stdout) > 1
            or _count_lines(subprocess_out.stderr) > 1
        ):
            raise ValueError(
                f"`{cmd}` has an unexpected output:\n"
                "```\n"
                f"STDOUT = {subprocess_out.stdout}\n"
                f"STDERR = {subprocess_out.stderr}\n"
                "```"
            )


class Leviosam2Workflow:
    """LevioSAM2 workflow object."""

    def _infer_aligner_exe(self):
        """Infers aligner executable name."""
        if self.aligner in ["bowtie2", "minimap2", "winnowmap2", "strobealign"]:
            self.aligner_exe = self.aligner
        elif self.aligner == "bwamem":
            self.aligner_exe = "bwa"
        elif self.aligner == "bwamem2":
            self.aligner_exe = "bwa-mem2"
        else:
            raise KeyError(f"Unsupported aligner: {self.aligner}")

    def validate_executables(self) -> None:
        validate_executable(cmd=f"{self.samtools} --version")
        validate_executable(cmd=f"{self.bgzip} --version")
        if self.measure_time:
            validate_executable(cmd=f"{self.gtime} --version")
        validate_executable(cmd=f"{self.leviosam2}", lenient=True)
        validate_executable(cmd=f"{self.aligner_exe}", lenient=True)

    def run_leviosam2(self) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """Run leviosam2."""
        lift_commit_min_mapq_arg = (
            f"-S mapq:{self.lift_commit_min_mapq} "
            if self.lift_commit_min_mapq
            else ""
        )
        lift_commit_min_score_arg = (
            f"-S aln_score:{self.lift_commit_min_score} "
            if self.lift_commit_min_score
            else ""
        )
        lift_commit_max_frac_clipped_arg = (
            f"-S clipped_frac:{self.lift_commit_max_frac_clipped} "
            if self.lift_commit_max_frac_clipped
            else ""
        )
        lift_commit_max_isize_arg = (
            f"-S isize:{self.lift_commit_max_isize} "
            if self.lift_commit_max_isize
            else ""
        )
        lift_commit_max_hdist_arg = (
            f"-S hdist:{self.lift_commit_max_hdist} "
            if self.lift_commit_max_hdist
            else ""
        )
        lift_max_gap_arg = (
            f"-G {self.lift_max_gap} " if self.lift_max_gap else ""
        )
        lift_bed_commit_source_arg = (
            f"-r {self.lift_bed_commit_source} "
            if self.lift_bed_commit_source
            else ""
        )
        lift_bed_defer_target_arg = (
            f"-D {self.lift_bed_defer_target} "
            if self.lift_bed_defer_target
            else ""
        )
        lift_realign_config_arg = (
            f"-x {self.lift_realign_config} "
            if self.lift_realign_config
            else ""
        )

        cmd = (
            f"{self.time_cmd}"
            f"{self.leviosam2} lift -C {self.leviosam2_index} -O bam "
            f"-a {self.input_bam} -p {self.out_prefix} "
            f"-t {self.num_threads} -m -f {self.target_fasta} "
            f"{lift_commit_min_mapq_arg}"
            f"{lift_commit_min_score_arg}"
            f"{lift_commit_max_frac_clipped_arg}"
            f"{lift_commit_max_isize_arg}"
            f"{lift_commit_max_hdist_arg}"
            f"{lift_max_gap_arg}"
            f"{lift_bed_commit_source_arg}"
            f"{lift_bed_defer_target_arg}"
            f"{lift_realign_config_arg}"
        )
        if self.dryrun:
            return cmd
        else:
            check_file_exists(self.input_bam)
            if (not self.forcerun) and self._fn_committed.is_file():
                logger.info(
                    f"Skip run_leviosam2 -- {self._fn_committed} exists"
                )
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_sort_committed(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """Sort the committed BAM.

        Subprocess inputs:
            - <self.out_prefix>-committed.bam
        Subprocess outputs:
            - <self.out_prefix>-committed-sorted.bam
        """
        cmd = (
            f"{self.time_cmd}"
            f"{self.samtools} sort -@ {self.num_threads} "
            f"-o {self._fn_committed_sorted} {self._fn_committed}"
        )
        if self.dryrun:
            return cmd
        else:
            check_file_exists(self._fn_committed)
            if (not self.forcerun) and self._fn_committed_sorted.is_file():
                logger.info(
                    f"Skip run_sort_committed -- {self._fn_committed_sorted} exists"
                )
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_collate_pe(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """[Paired-end] Collate committed/deferred BAMs to properly paired FASTQs.

        Subprocess inputs:
            - <self.out_prefix>-committed-sorted.bam
            - <self.out_prefix>-deferred.bam
        Subprocess outputs:
            - <self.out_prefix>-paired-deferred-R1.fq.gz
            - <self.out_prefix>-paired-deferred-R2.fq.gz
        """
        cmd = (
            f"{self.time_cmd}"
            f"{self.leviosam2} collate "
            f"-a {self._fn_committed_sorted} -b {self._fn_deferred} "
            f"-p {self._prefix_deferred_pe}"
        )
        if self.dryrun:
            return cmd
        else:
            check_file_exists(self._fn_committed_sorted)
            check_file_exists(self._fn_deferred)

            if (
                (not self.forcerun)
                and self._fn_deferred_pe_fq1.is_file()
                and self._fn_deferred_pe_fq2.is_file()
            ):
                logger.info(
                    f"Skip run_collate_pe -- both {self._fn_deferred_pe_fq1} and "
                    f"{self._fn_deferred_pe_fq2} exist"
                )
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    # def run_intial_align(
    #     self,
    # ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
    #     """Align raw reads.

    #     Thread resource allocation:
    #         - paired-end: alignment=`num_threads`-1, view=1
    #         - single-end: alignment=`num_threads`-sort, sort=`num_threads//5`

    #     Output file:
    #         - self.input_bam
    #     """

    #     if self.is_single_end:
    #         num_threads_sort = max(1, self.num_threads // 5)
    #         num_threads_aln = max(1, self.num_threads - num_threads_sort)
    #         fn_out = self.input_bam
    #     pass

    #         if self.aligner == "bowtie2":
    #             reads = f"-U {self.#TODO}"
    #         else:
    #             reads = f"{self.#TODO}"

    #         cmd_samtools = (
    #             f"{self.time_cmd}"
    #             f"{self.samtools} sort -@ {num_threads_aln} -o {fn_out}"
    #         )
    #     else:
    #         num_threads_aln = max(1, self.num_threads - 1)
    #         if self.aligner not in ["bowtie2", "bwamem", "bwamem2", "strobealign"]:
    #             raise ValueError(
    #                 "We have not supported paired-end "
    #                 f"mode for aligner {self.aligner}"
    #             )
    #         fn_out = self._fn_deferred_realigned_pe

    #         if self.aligner == "bowtie2":
    #             reads = f"-1 {self.#TODO} " f"-2 {self.#TODO}"
    #         else:
    #             reads = f"{self.#TODO} " f"{self.#TODO}"

    #         cmd_samtools = f"{self.time_cmd} {self.samtools} view -hbo {fn_out}"

    #     if self.aligner == "bowtie2":
    #         cmd = (
    #             f"{self.time_cmd}"
    #             f"{self.aligner_exe} {self.read_group} "
    #             f"-p {num_threads_aln} -x {self.#TODO} {reads} | "
    #             f"{cmd_samtools}"
    #         )
    #     elif self.aligner in ["bwamem", "bwamem2"]:
    #         if self.read_group != "":
    #             self.read_group = f"-R {self.read_group}"
    #         cmd = (
    #             f"{self.time_cmd}"
    #             f"{self.aligner_exe} mem {self.read_group} "
    #             f"-t {num_threads_aln} {self.#TODO} {reads} | "
    #             f"{cmd_samtools}"
    #         )
    #     elif self.aligner == "strobealign":
    #         if self.read_group != "":
    #             self.read_group = f"--rg {self.read_group}"
    #         cmd = (
    #             f"{self.time_cmd}"
    #             f"{self.aligner_exe} {self.read_group} "
    #             f"-t {num_threads_aln} {self.#TODO} {reads} | "
    #             f"{cmd_samtools}"
    #         )
    #     elif self.aligner in ["minimap2", "winnowmap2"]:
    #         # Do not use a prefix other than 'map-hifi' and 'map-ont' modes.
    #         preset = ""
    #         if self.sequence_type == "pb-hifi":
    #             preset = "-x map-hifi"
    #         elif self.sequence_type == "ont":
    #             preset = "-x map-ont"
    #         if self.read_group != "":
    #             self.read_group = f"-R {self.read_group}"
    #         cmd = (
    #             f"{self.time_cmd}"
    #             f"{self.aligner_exe} {self.read_group} -a {preset} --MD "
    #             f"-t {num_threads_aln} {self.#TODO} {reads} | "
    #             f"{cmd_samtools}"
    #         )

    #     if self.dryrun:
    #         return cmd
    #     else:
    #         check_file_exists(self.#TODO)
    #         check_file_exists(self.#TODO)

    #         if (not self.forcerun) and fn_out.is_file():
    #             print(f"[Info] Skip run_intial_align -- {fn_out} exists")
    #             return SKIP_MSG
    #         return subprocess.run([cmd], shell=True)

    @staticmethod
    def _allocate_aln_thread(num_threads: int, if_sort: bool = True):
        if if_sort:
            num_threads_samtools = max(1, num_threads // 5)
            num_threads_aln = max(1, num_threads - num_threads_samtools)
        else:
            num_threads_aln = max(1, num_threads - 1)
            num_threads_samtools = 1
        return num_threads_aln, num_threads_samtools

    def run_realign_deferred(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """Re-align deferred reads.

        Aligned reads are piped to `samtools sort` for single-end reads.
        Paired-end reads need to be collated later so just compressed.
        Thread resource allocation:
            - paired-end: alignment=`num_threads`-1, view=1
            - single-end: alignment=`num_threads`-sort, sort=`num_threads//5`
        """
        # Paired-end deferred reads do not need sorting
        num_threads_aln, num_threads_samtools = self._allocate_aln_thread(
            num_threads=self.num_threads, if_sort=self.is_single_end
        )
        if self.is_single_end:
            fn_out = self._fn_deferred_realigned_se

            if self.aligner == "bowtie2":
                reads = f"-U {self._fn_deferred_fq_se}"
            else:
                reads = f"{self._fn_deferred_fq_se}"

            cmd_samtools = (
                f"{self.time_cmd}"
                f"{self.samtools} sort -@ {num_threads_samtools} -o {fn_out}"
            )
        else:
            if self.aligner not in [
                "bowtie2",
                "bwamem",
                "bwamem2",
                "strobealign",
            ]:
                raise ValueError(
                    "We have not supported paired-end "
                    f"mode for aligner {self.aligner}"
                )
            fn_out = self._fn_deferred_realigned_pe

            if self.aligner == "bowtie2":
                reads = f"-1 {self._fn_deferred_pe_fq1} -2 {self._fn_deferred_pe_fq2}"
            else:
                reads = f"{self._fn_deferred_pe_fq1} {self._fn_deferred_pe_fq2}"

            cmd_samtools = f"{self.time_cmd} {self.samtools} view -hbo {fn_out}"

        if self.aligner == "bowtie2":
            if not self.target_aligner_index:
                raise ValueError(
                    f"`{self.target_aligner_index}` is not a valid bowtie2 index path"
                )
            cmd = (
                f"{self.time_cmd}"
                f"{self.aligner_exe} {self.read_group} "
                f"-p {num_threads_aln} -x {self.target_aligner_index} {reads} | "
                f"{cmd_samtools}"
            )
        elif self.aligner in ["bwamem", "bwamem2"]:
            if self.read_group != "":
                self.read_group = f"-R {self.read_group}"
            cmd = (
                f"{self.time_cmd}"
                f"{self.aligner_exe} mem {self.read_group} "
                f"-t {num_threads_aln} {self.target_aligner_index} {reads} | "
                f"{cmd_samtools}"
            )
        elif self.aligner == "strobealign":
            if self.read_group != "":
                self.read_group = f"--rg {self.read_group}"
            cmd = (
                f"{self.time_cmd}"
                f"{self.aligner_exe} {self.read_group} "
                f"-t {num_threads_aln} {self.target_aligner_index} {reads} | "
                f"{cmd_samtools}"
            )
        elif self.aligner in ["minimap2", "winnowmap2"]:
            # Do not use a prefix other than 'map-hifi' and 'map-ont' modes.
            preset = ""
            if self.sequence_type == "pb-hifi":
                preset = "-x map-hifi"
            elif self.sequence_type == "ont":
                preset = "-x map-ont"
            if self.read_group != "":
                self.read_group = f"-R {self.read_group}"
            cmd = (
                f"{self.time_cmd}"
                f"{self.aligner_exe} {self.read_group} -a {preset} --MD "
                f"-t {num_threads_aln} {self.target_aligner_index} {reads} | "
                f"{cmd_samtools}"
            )

        if self.dryrun:
            return cmd
        else:
            if self.is_single_end:
                check_file_exists(self._fn_deferred_fq_se)
            elif not self.is_single_end:
                check_file_exists(self._fn_deferred_pe_fq1)
                check_file_exists(self._fn_deferred_pe_fq2)

            if (not self.forcerun) and fn_out.is_file():
                logger.info(f"Skip run_realign_deferred -- {fn_out} exists")
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_refflow_merge_pe(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """(Paired-end) reference flow-style merging of deferred/realigned BAMs.

        Subprocess inputs:
            - self._fn_deferred_realigned_pe
            - self._fn_deferred_pe
        Subprocess outputs:
            - (tmp) self._fn_deferred_realigned_pe_sortn
            - (tmp) self._fn_deferred_pe_sortn
            - self._fn_deferred_reconciled
        """
        cmd = ""
        cmd += (
            f"{self.time_cmd}"
            f"{self.samtools} sort -@ {self.num_threads} -n "
            f"-o {self._fn_deferred_realigned_pe_sortn} "
            f"{self._fn_deferred_realigned_pe} && "
        )
        cmd += (
            f"{self.time_cmd}"
            f"{self.samtools} sort -@ {self.num_threads} -n "
            f"-o {self._fn_deferred_pe_sortn} {self._fn_deferred_pe} && "
        )
        cmd += (
            f"{self.time_cmd}"
            f"{self.leviosam2} reconcile "
            f"-s {self.source_label}:{self._fn_deferred_pe_sortn} "
            f"-s {self.target_label}:{self._fn_deferred_realigned_pe_sortn} "
            f"-m -o - | "
            f"{self.time_cmd}"
            f"{self.samtools} sort -@ {self.num_threads} "
            f"-o {self._fn_deferred_reconciled}"
        )

        if self.dryrun:
            return cmd
        else:
            check_file_exists(self._fn_deferred_realigned_pe)
            check_file_exists(self._fn_deferred_pe)

            if (not self.forcerun) and self._fn_deferred_reconciled.is_file():
                logger.info(
                    "Skip run_refflow_merge_pe -- "
                    f"{self._fn_deferred_reconciled} exists"
                )
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_merge_and_index(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """Merge all processed BAMs and index."""
        if not self.is_single_end:
            fn_committed_sorted = self._fn_committed_pe
            fn_in_deferred = self._fn_deferred_reconciled
        else:
            fn_committed_sorted = self._fn_committed_sorted
            fn_in_deferred = self._fn_deferred_realigned_se

        cmd = ""
        cmd += (
            f"{self.time_cmd}"
            f"{self.samtools} merge -f -@ {self.num_threads} "
            f"-o {self._fn_final} {fn_committed_sorted} {fn_in_deferred} && "
        )
        cmd += f"{self.time_cmd}" f"{self.samtools} index {self._fn_final}"

        if self.dryrun:
            return cmd
        else:
            check_file_exists(self._fn_committed_sorted)
            check_file_exists(fn_in_deferred)

            if (not self.forcerun) and self._fn_final.is_file():
                logger.info(
                    f"Skip run_merge_and_index -- {self._fn_final} exists"
                )
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_bam_to_fastq_se(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """(Single-end) Convert deferred BAM to FASTQ."""
        cmd = (
            f"{self.time_cmd}{self.samtools} fastq {self._fn_deferred} | "
            f"{self.time_cmd}{self.bgzip} > {self._fn_deferred_fq_se}"
        )

        if self.dryrun:
            return cmd
        else:
            check_file_exists(self._fn_deferred)

            if (not self.forcerun) and self._fn_deferred_fq_se.is_file():
                logger.info(
                    f"Skip run_bam_to_fastq_se -- {self._fn_deferred_fq_se} exists"
                )
                return SKIP_MSG
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_bam_to_cram(
        self,
    ) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        """Converts the final BAM file to the CRAM format.

        We have not supported writing to the CRAM format in the leviosam2
        software. This function converts the final BAM file to the CRAM format.
        """
        fn_final_cram = self._fn_final.with_suffix(".cram")
        cmd = (
            f"{self.time_cmd}{self.samtools} view -T {self.target_fasta} "
            f"-O cram -o {fn_final_cram} {self._fn_final} && "
        )
        cmd += f"{self.time_cmd}" f"{self.samtools} index {fn_final_cram}"

        if self.dryrun:
            return cmd
        else:
            logger.info(cmd)
            return subprocess.run([cmd], shell=True)

    def run_clean(self) -> Union[str, "subprocess.CompletedProcess[bytes]"]:
        logger.info(f"Unlink {self._fn_committed}")
        logger.info(f"Unlink {self._fn_committed_sorted}")
        logger.info(f"Unlink {self._fn_deferred}")
        if self.is_single_end:
            logger.info(f"Unlink {self._fn_deferred_fq_se}")
        else:
            logger.info(f"Unlink {self._fn_committed_pe}")
            logger.info(f"Unlink {self._fn_deferred_pe_fq1}")
            logger.info(f"Unlink {self._fn_deferred_pe_fq2}")
            logger.info(f"Unlink {self._fn_deferred_pe}")
            logger.info(f"Unlink {self._fn_deferred_pe_sortn}")
            logger.info(f"Unlink {self._fn_deferred_realigned_pe}")
            logger.info(f"Unlink {self._fn_deferred_realigned_pe_sortn}")
            logger.info(f"Unlink {self._fn_deferred_reconciled}")
        if not self.dryrun:
            self._fn_committed.unlink()
            self._fn_committed_sorted.unlink()
            self._fn_deferred.unlink()
            if self.is_single_end:
                self._fn_deferred_fq_se.unlink()
            else:
                self._fn_committed_pe.unlink()
                self._fn_deferred_pe_fq1.unlink()
                self._fn_deferred_pe_fq2.unlink()
                self._fn_deferred_pe.unlink()
                self._fn_deferred_pe_sortn.unlink()
                self._fn_deferred_realigned_pe.unlink()
                self._fn_deferred_realigned_pe_sortn.unlink()
                self._fn_deferred_reconciled.unlink()
            if self.out_format == "cram":
                # Removes the final BAM file if the output format is set to
                # CRAM.
                self._fn_final.unlink()
                (self._fn_final.with_suffix(".bam.bai")).unlink()

    def _set_filenames(self):
        """Update filenames potentially used when running the workflow."""
        # Accornyms to shorten the lines for readability
        o_dir = self.out_prefix.parent
        o_prefix = self.out_prefix.name
        o_fmt = "bam"

        self._fn_committed = o_dir / f"{o_prefix}-committed.{o_fmt}"
        self._fn_committed_sorted = (
            o_dir / f"{o_prefix}-committed-sorted.{o_fmt}"
        )
        self._fn_deferred = o_dir / f"{o_prefix}-deferred.{o_fmt}"
        self._fn_final = o_dir / f"{o_prefix}-final.{o_fmt}"

        # paired-end
        self._prefix_deferred_pe = o_dir / f"{o_prefix}-paired"
        self._fn_deferred_pe_fq1 = (
            o_dir / f"{o_prefix}-paired-deferred-R1.fq.gz"
        )
        self._fn_deferred_pe_fq2 = (
            o_dir / f"{o_prefix}-paired-deferred-R2.fq.gz"
        )
        self._fn_committed_pe = o_dir / f"{o_prefix}-paired-committed.{o_fmt}"
        self._fn_deferred_pe = o_dir / f"{o_prefix}-paired-deferred.{o_fmt}"
        self._fn_deferred_pe_sortn = (
            o_dir / f"{o_prefix}-paired-deferred-sorted_n.{o_fmt}"
        )
        self._fn_deferred_realigned_pe = (
            o_dir / f"{o_prefix}-paired-realigned.{o_fmt}"
        )
        self._fn_deferred_realigned_pe_sortn = (
            o_dir / f"{o_prefix}-paired-realigned-sorted_n.{o_fmt}"
        )
        self._fn_deferred_reconciled = (
            o_dir / f"{o_prefix}-paired-deferred-reconciled-sorted.{o_fmt}"
        )

        # single-end
        self._fn_deferred_fq_se = o_dir / f"{o_prefix}-deferred.fq.gz"
        self._fn_deferred_realigned_se = o_dir / f"{o_prefix}-realigned.{o_fmt}"

    def run_workflow(self):
        # TODO
        # run_intial_align()

        self._set_filenames()

        self.run_leviosam2()

        self.run_sort_committed()

        if self.is_single_end:
            self.run_bam_to_fastq_se()
            self.run_realign_deferred()
        else:
            self.run_collate_pe()
            self.run_realign_deferred()
            self.run_refflow_merge_pe()

        self.run_merge_and_index()

        if self.out_format == "cram":
            self.run_bam_to_cram()

        if not self.keep_tmp_files:
            self.run_clean()

    def check_inputs(self) -> None:
        """Check if input files exist."""
        check_file_exists(self.target_fasta)
        check_file_exists(self.input_bam)

    def __init__(self, args: argparse.Namespace) -> None:
        self.aligner = args.aligner
        self.aligner_exe = args.aligner_exe
        if self.aligner_exe == "auto":
            # Sets `self.aligner_exe``
            self._infer_aligner_exe()
        self.sequence_type = args.sequence_type
        self.is_single_end = self.sequence_type not in ["ilmn_pe"]
        self.read_group = args.read_group
        self.num_threads = args.num_threads
        self.source_label = args.source_label
        self.target_label = args.target_label

        self.dryrun = args.dryrun
        self.forcerun = args.forcerun
        self.keep_tmp_files = args.keep_tmp_files

        # executables
        self.samtools = args.samtools_exe
        self.bgzip = args.bgzip_exe
        self.gtime = args.gnu_time_exe
        self.leviosam2 = args.leviosam2_exe

        # Computational performance measurement related
        self.measure_time = args.measure_time
        self.time_cmd = ""
        if args.measure_time:
            self.time_cmd = (
                f"{self.gtime} -v " f"-ao {self.out_prefix}.time_log "
            )

        # Converts file name strings to pathlib.Path()
        self.target_fasta = pathlib.Path(args.target_fasta)
        self.input_bam = pathlib.Path(args.input_bam)
        self.out_prefix = pathlib.Path(args.out_prefix)
        self.leviosam2_index = pathlib.Path(args.leviosam2_index)
        self.out_format = args.out_format
        if args.target_aligner_index:
            self.target_aligner_index = pathlib.Path(args.target_aligner_index)
        else:
            logger.info(
                f"`--target_aligner_index` is empty. Set to {args.target_fasta}"
            )
            self.target_aligner_index = pathlib.Path(args.target_fasta)

        self.lift_commit_min_mapq = args.lift_commit_min_mapq
        self.lift_commit_min_score = args.lift_commit_min_score
        self.lift_commit_max_frac_clipped = args.lift_commit_max_frac_clipped
        self.lift_commit_max_isize = args.lift_commit_max_isize
        self.lift_commit_max_hdist = args.lift_commit_max_hdist
        self.lift_max_gap = args.lift_max_gap
        self.lift_bed_commit_source = args.lift_bed_commit_source
        self.lift_bed_defer_target = args.lift_bed_defer_target
        self.lift_realign_config = args.lift_realign_config


class Leviosam2WorkflowIlmnPreset(Leviosam2Workflow):
    """Workflow for Ilmn reads."""

    def __init__(self, args: argparse.Namespace) -> None:
        super().__init__(args=args)
        if self.sequence_type not in ["ilmn_se", "ilmn_pe"]:
            raise ValueError(
                "Invalid sequence type for Leviosam2WorkflowIlmn: "
                f"`{self.sequence_type}`"
            )
        if self.aligner not in ["bowtie2", "bwamem", "bwamem2"]:
            raise ValueError(
                f"Invalid aligner for Leviosam2WorkflowIlmn: `{self.aligner}`"
            )
        if args.use_preset:
            self.lift_max_gap = 0
            self.lift_commit_max_hdist = 5
            if self.aligner == "bowtie2":
                self.lift_commit_min_mapq = 10
                self.lift_commit_min_score = -10
            elif self.aligner in ["bwamem", "bwamem2"]:
                self.lift_commit_min_mapq = 30
                self.lift_commit_min_score = 100


class Leviosam2WorkflowPacbioHifiPreset(Leviosam2Workflow):
    """Workflow for Ilmn reads."""

    def __init__(self, args: argparse.Namespace) -> None:
        super().__init__(args=args)
        if self.sequence_type not in ["pb_hifi"]:
            raise ValueError(
                "Invalid sequence type for Leviosam2WorkflowPacbioHifiPreset: "
                f"`{self.sequence_type}`"
            )
        if self.aligner not in ["minimap2", "winnowmap2"]:
            raise ValueError(
                "Invalid aligner for Leviosam2WorkflowPacbioHifiPreset: "
                f"`{self.aligner}`"
            )
        if args.use_preset:
            self.lift_max_gap = 1000
            self.lift_commit_max_hdist = 100


class Leviosam2WorkflowOntPreset(Leviosam2Workflow):
    """Workflow for Ilmn reads."""

    def __init__(self, args: argparse.Namespace) -> None:
        super().__init__(args=args)
        if self.sequence_type not in ["ont"]:
            raise ValueError(
                "Invalid sequence type for Leviosam2WorkflowOntPreset: "
                f"`{self.sequence_type}`"
            )
        if self.aligner not in ["minimap2", "winnowmap2"]:
            raise ValueError(
                "Invalid aligner for Leviosam2WorkflowOntPreset: "
                f"`{self.aligner}`"
            )
        if args.use_preset:
            self.lift_max_gap = 1500
            self.lift_commit_max_hdist = 6000


def run_workflow(args: argparse.Namespace):
    if args.use_preset:
        if args.sequence_type in ["ilmn_se", "ilmn_pe"]:
            if args.aligner in ["bowtie2", "bwamem", "bwamem2"]:
                workflow = Leviosam2WorkflowIlmnPreset(args)
        elif args.sequence_type == "pb_hifi":
            if args.aligner in ["minimap2", "winnowmap2"]:
                workflow = Leviosam2WorkflowPacbioHifiPreset(args)
        elif args.sequence_type == "ont":
            if args.aligner in ["minimap2", "winnowmap2"]:
                workflow = Leviosam2WorkflowOntPreset(args)
        else:
            raise ValueError(
                "There is no preset for the sequence_type and aligner combination of "
                f"{args.sequence_type} and {args.aligner}"
            )
    else:
        workflow = Leviosam2Workflow(args)
    workflow.check_inputs()
    workflow.validate_executables()
    workflow.run_workflow()


if __name__ == "__main__":
    args = parse_args()
    run_workflow(args)
